Salmon LHT data for DIASPARA 2.2

Author

Viktor Thunell

Published

September 4, 2025

Load libraries

Show code
# Load libraries, install if needed
pkgs <- c("tidyverse", "tidylog", "devtools","viridis","patchwork", "sdmTMB", "stringi","readxl","broom") 

if(length(setdiff(pkgs,rownames(installed.packages()))) > 0){
  
    install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
  
  }

invisible(lapply(pkgs, library, character.only = T))

options(ggplot2.continuous.colour = "viridis")
#theme_set(theme_grey()) # check why global theme option not working when rendering
devtools::source_url("https://raw.githubusercontent.com/VThunell/diasp-lht/main/R/functions/map-plot.R") # message of SHA-1 hash for file

# Set path
home <- here::here()

1. Read data

1a. Read Length at age data

Four data sets build the length at age data:

  1. Sweden SLU database “Sötebasen”
    • At locations across Sweden on both the Baltic side and in the Western sea
    • From recreational and commercial catches and scientific surveys
    • We assume these are fish returning to spawn but catches are sometime coastal(!)
    • Coordinates of sai_location (river or sometimes region, i.e. Baltic Sea or West coast). This is not the exact catch place.
  2. Finland back calculated growth data
    • Rod, trap and netting methods.
    • From river Tornionjoki with tributaries.
  3. Finland catch data
    • GEAR
    • From river Tornionjoki with tributaries
    • Individuals hatched in 1994 have been excluded as there are uncertainties about origin (wild or reared).
  4. France catch data
    • Recreational and commercial fishing and scientific trapping
    • from rivers throughout the French Atlantic coast
    • Juvenile salmon smolt from traps and electrofished parr
  5. Latvia data

The data sets is read below and variable names of interest (age, length, sex, fi_year, sai_location, origin) are standardized.

The data is filtered and cleaned in Section 4 when combining data.

Show code
# Sweden SLU database "Sötebasen"
swe.sallaa <- read_delim(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/LaxindividerSötebasen.csv", delim = ";", locale = locale(encoding="latin1"), col_types = cols(Märkning2 = col_character(), Märke2Nr = col_character(), ÅlderLek3 = col_character())) %>%
  # remove individuals without length or sea age
  rename(sea_age_year = AdultÅlder,
         juvenile_age_year = SmoltÅlder,
         length_mm = Längd,
         weight_g = Vikt,
         fi_year = Årtal,
         origin = Ursprung,
         sai_location = VattenNamn,
         stage = Stadium
         ) %>%
  mutate(sai_cou_code = "SWE",
         date = as.Date(FångstDatum),
         sex = if_else(Kön %in% c("m","f"), Kön, NA),
         origin = case_when(origin == "Odlad" ~ "reared",
                            origin == "Vild" ~ "wild",
                            .default = origin))#,
         # stage = case_when(stage == c("Smolt2 (mellanstadium)","Smolt3 (typisk smolt)","Smolt1 (något stirrlik )","Smolt0 (mycket stirrlik)","Smolt") ~ "SM",
         #                   stage == c("Adult","Lekfisk") ~ "S",
         #                   sea_age_year >= 0 ~ "S",
         #                   juvenile_age_year >= 0 ~ "SM",
         #                    .default = NA))
# Columns that need to be fixed if used: Märkning2, Märke2Nr, ÅlderLek3

# swe.sallaa %>%
#   #filter(is.na(stage)) %>%
#   #filter(!is.na(length_mm)) %>%
#   filter(!(is.na(juvenile_age_year) & is.na(sea_age_year))),
#          !is.na(juvenile_age_year))
#   count(stage)

# Finnish back calculated growth
fin.sallaa <- read_delim(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Tornionjoki_growth measurements_1970s-.txt", delim = "\t", locale = locale(encoding="latin1", decimal_mark = ",")) %>%
  rename(sea_age_year = `SEA-AGE`,
         juvenile_age_year = `SMOLT AGE`,
         length_mm = LENGTH,
         weight_g = WEIGHT,
         fi_year = YEAR,
         ) %>%
  # assuming 2 is female (larger mean length)
  mutate(sex = if_else(SEX == 2, "f", "m", missing = NA),
         date = make_date(fi_year, MONTH, DAY),
         sai_cou_code = "FIN",
         origin = NA,
         sai_location = "Tornionjoki")

# Finnish catch data
fin.sallaa2 <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Non-kelts, data-Table 1.csv") %>% 
  rename(sea_age_year = `SEA-AGE`,
         juvenile_age_year = `SMOLT AGE`,
         length_mm = `LENGTH mm`,
         weight_g = `WEIGHT grams`,
         fi_year = YEAR,
         origin = `LIKELY ORIGIN (uusittu smolttidataa vast)`
         ) %>%
  mutate(sex = if_else(SEX == 2, "f", "m", missing = NA), 
         sai_cou_code = "FIN",
         date = make_date(fi_year, MONTH, DAY),
         # likely origin where 5 == uncertain becomes NA 
         origin = case_when(origin == 1 ~ "wild",
                            origin %in% c(2,3,4) ~ "reared",
                            .default = NA),
         sai_location = "Tornionjoki")

# France length at age
fra.sallaa <- read_csv2(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/SAT_TailleAge_France2.csv", locale = locale(encoding="latin1")) %>%
  rename(sea_age_year = sea_age,
         juvenile_age_year = smolt_age,
         ) %>%
  mutate(date = dmy_hm(cam_date_heure_fin, truncated = 2),
         fi_year = year(date),
         sai_location = str_to_title(sita_nom),
         origin = NA,
         sai_cou_code = "FRA",
         # WGNAS stock unit
         stock.unit = "France") 

# France juveniles
fra.sallaa2 <- read_excel("/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/SAT_TailleAge_juv_ORE_forViktor.xlsx") %>%
  rename(juvenile_age_year = age,
         sai_lfs_code = stage
         ) %>%
  mutate(date = parse_date_time(cam_date_heure_fin, orders = c("ymd", "ymd HMS")),
         sex = if_else(sxe_code_ref %in% c("M","F"), str_to_lower(sxe_code_ref), NA),
         date = floor_date(date, "minute"),
         fi_year = year(date),
         sai_location = str_to_title(sita_nom),
         sai_cou_code = "FRA",
         # WGNAS stock unit
         stock.unit = "France")

# Latvia catch data
lva.sallaa <- read_excel("/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/original/DIASPARA-Tables-Salmon-Data-LV.xlsx", sheet = 3) %>%
  mutate(fi_datecapture = as.character(fi_datecapture)) %>%
  left_join(read_excel("/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/original/DIASPARA-Tables-Salmon-Data-LV.xlsx", sheet = 2) %>%
              filter(if_any(everything(), ~ !is.na(.))), by = "sai_name") %>%
  mutate(sex = if_else(`is_female(0=male, 1=female)` == 1, "f", "m", missing = NA), 
         date = as.Date(fi_datecapture),
         origin = if_else(`presence_adipose_fin(0=absent, 1=present)` == 1, "wild","reared"),
         juvenile_age_year = str_replace(juvenile_age_year, "\\+",""),
         juvenile_age_year = as.numeric(juvenile_age_year),
         fi_year = year(fi_datecapture),
         # chnage x to y and y to x. 
         pl = fisa_x_4326, 
         fisa_x_4326 = fisa_y_4326,
         fisa_y_4326 = pl)

# Ireland catch data
ie.sallaa <- read_excel("/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/original/DIASPARA-Tables-Salmon-Data-request - UPDATED sheet Burrishoole.xlsx", sheet = 3) %>%
  mutate(fi_datecapture = as.character(fi_datecapture)) %>%
  left_join(read_excel("/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/original/DIASPARA-Tables-Salmon-Data-request - UPDATED sheet Burrishoole.xlsx", sheet = 2) %>%
              filter(if_any(everything(), ~ !is.na(.))), by = "sai_name") %>%
  mutate(sex = if_else(`is_female(0=male, 1=female)` == 1, "f", "m", missing = NA), 
         date = as.Date(fi_datecapture),
         origin = if_else(`presence_adipose_fin(0=absent, 1=present)` == 1, "wild","reared"),
         juvenile_age_year = str_replace(juvenile_age_year, "\\+",""), #remove plus 
         juvenile_age_year = parse_number(juvenile_age_year),
         fi_year = year(fi_datecapture))

# str(swe.sallaa)
# str(fin.sallaa)
# str(fin.sallaa2)
# str(fra.sallaa)
# str(fra.sallaa2)
# str(lva.sallaa)
# str(ie.sallaa)

1b. Read Fecundity at length data

Six data sets build the fecundity length data:

  • Sweden 1 - Baltic Sea
    • Collected in river Umeälven (and tributary Vindelälven) and Dalälven
    • Total fecundity available (stripped + dissected). I.e. All eggs are counted!
    • Also trout data that is filtered out.
  • Sweden 2 - Swedish west coast
    • Collected in rearing station in river Göta älv
    • All fin-clipped individuals.
    • Only stripped (from what V.T. knows, methods description lacking atm)
    • Not total fecundity as in Sweden 1
  • Finland 1
    • Described in the report “HYDROACOUSTIC ASSESSMENT OF SALMON IN THE RIVER TORNIONJOKI - FINAL REPORT, EU STUDY PROJECT 96-069”
    • Total fecundity available (stripped + dissected). I.e. an estimation of all eggs!
    • From river Tornionjoki
    • Both reared (finclipped) and wild individuals (adipose fin intact) but the majority are wild.
  • Finland 2
    • From river Tornionjoki and Simojoki
    • wild, reared and NA mix of individuals
  • France
    • Described in the Samarch report “Changes in sex ratio and fecundity of salmonids” (M. Nevoux et al. 2020, Deliverable D3.3.1)
    • Methods for assessing fecundity is stripping and a subset of the volume (or weight) of eggs were counted, then the total fecundity was extrapolated based on the total volume (weight) of the stripped eggs (pers. com M. Nevoux)
    • From 13 rivers in three regions
    • Origin should mainly be wild (pers. com. M. Nevoux)

The data is filtered in Section 4 when combining data.

Show code
# Fecundity Sweden Baltic
swe.salfec <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Fekunditetsdata_Dal_Ume_20241213.csv") %>%
  filter(Species == "Salmon") %>%
  rename(n.eggs = `No eggs total`,
         length_mm = `Length (cm)`,
         weight_g = `Weight before stripping (kg)`,
         origin = `Wild/Reared`,
         sai_location = River,
         ) %>%
  mutate(origin = tolower(origin),
         sai_cou_code = "SWE",
         fi_year = as.numeric(Year), 
         # to grams 
         weight_g = weight_g*1000, 
         n.eggs = as.numeric(str_remove_all(n.eggs, " ")),
         # to mm 
         length_mm = if_else(is.na(length_mm), NA, length_mm*10))
        
# Fecundity Sweden Göta älv 2024
swe.salfec2 <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/fecundity_Gotaalv_2024.csv") %>%
  rename(n.eggs = `ROM SKATTA ANTAL (ST)`,
         length_mm = `Längd (cm)`,
         weight_g = `Vikt (kg)`,
         ) %>%
  mutate(origin = if_else(fettfena == "ej","reared","wild"),
         sai_location = "Göta älv",
         sai_cou_code = "SWE",
         fi_year = 2024, 
         # to grams 
         weight_g = weight_g*1000,
         # to mm
         length_mm = length_mm*10) 

# Fecundity Finland 1996-1998
fin.salfec1 <- read_delim(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Tornionjoki_1996-1998_fecundity.csv") %>%
  rename(n.eggs = `TOTAL FECUNDITY (EXCL. UNCERTAIN OOZYTES)`,
         length_mm = LENGTH,
         weight_g = WEIGTH,
         fi_year = YEAR
        ) %>%
  mutate(sai_cou_code = "FIN",
         sai_location = "Tornionjoki",
         origin = case_when(`ADIPOSE FIN (1=CUT, 2=INTACT)` == "2" ~ "reared",
                            `ADIPOSE FIN (1=CUT, 2=INTACT)` == "1" ~ "wild",
                            `ADIPOSE FIN (1=CUT, 2=INTACT)` == "0" ~ "uncertain",
                            .default = as.character(`ADIPOSE FIN (1=CUT, 2=INTACT)`)))

# Fecundity Finland 2006
fin.salfec2 <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Tornionjoki_fecundity_2006.csv") %>%
  rename(origin=`ORIGIN (1=WILD 2=REARED)`) %>%
  mutate(sai_location = "Tornionjoki",
         origin = case_when(origin == "2" ~ "reared",
                            origin == "1" ~ "wild",
                            .default = NA)) %>%
  bind_rows(read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Simojoki_fecundity_2006.csv") %>%
               mutate(sai_location = "Simojoki",
                      origin = NA) ) %>%
  rename(length_mm = `LENGTH (mm)`,
         n.eggs = `number eggs`) %>%
  mutate(fi_year = 2006,
         sai_cou_code = "FIN") 

# Fecundity France
fra.salfec <- read_csv(file = "/Users/vitl0001/Documents/Projects/DIASPARA/Incoming data/salmon/Fecondite_SAT_Bilan.csv") %>%
  rename(n.eggs = Fecondite,
         length.f = Lf,
         length.t = Lt,
         weight_g = Poids,
         fi_year = Annee,
         sai_location = Origine) %>%
  mutate(sai_cou_code = "FRA",
         origin = NA)

# str(swe.salfec)
# str(swe.salfec2)
# str(fin.salfec1)
# str(fin.salfec2)
# str(fra.salfec)

2. Spatial aggregations - Baltic Assessment and Atlantic Stock units and Regions

To add spatial information, sai_location name in the data is matched (dplyr::left_join():ed) with sai_location name in the tables created below (SweFin.rivers and French.rivers).

The spatial information added is:

  • Coordinates (WGS84 DD). French coordinates either provided by Hilaire Drouineau or added manually by V.T., they are at the river mouth (entering the sea or from a tributary) or for the Baie Mont St Michelle in the center of the bay. These are not catch places. For many tributary rivers, the main river mouth is used and for those rivers,the names are changed in 3a when combining data. Swedish coordinates eother existing in the data or added manually (river mouth, not catch place).
  • Stock unit (for WGNAS stock unit Sweden, Ireland, and France),
  • Assessment unit (for WGBAST),
  • Region and genetic region from French regions from Perrier et al. 2011 (doi: 10.1111/j.1365-294X.2011.05266.x), and provided by M. Nevoux, existing in the french fecundity data or added manually by V.T with input from M. Nevoux. Swedish regions are defined as Baltic Sea and Swedish west coast and Finnish is Baltic Sea.
  • All Irish data share one coordinate which represents where the Burrishoole enters the sea.
Show code
# Assessment units of the index rivers in Sweden and Finland. 
AU.rivers = bind_cols(sai_location = c("Tornionjoki","Simojoki","Kalixälven","Råneälven","Piteälven","Åbyälven","Byskeälven","Rickleån","Sävarån","Vindelälven","Öreälven","Lögdeälven","Ljungan","Mörrumsån","Emån", "Kågeälven","Testeboån", "Umeälven", "Dalälven", "Luleälven","Muonionjoki"),
                      asses.unit = c(1,1,1,1,2,2,2,2,2,2,2,2,3,4,4,2,3,2,3,2,1), 
                      stock.origin = "wild") %>%
  bind_rows(bind_cols(sai_location = c("Torneälven_hatchery","Luleälven_(RG_with_Pite)","Iijoki","Oulujoki","Skellefteälven","Umeälven_(RG_with_Vindel)","Ångermanälven","Indalsälven_(RG_with_Ljungan)","Ljusnan","Dalälven_(RG_with_Testeboån)", "Torneälven","Gideälven"), 
                      asses.unit = c(1,2,1,1,2,2,3,3,3,3,1,2), 
                      stock.origin = "reared")) 

# And the Swedish rivers entering the the Western sea, i.e. WGNAS stock unit "Sweden".
SU.rivers = bind_cols(sai_location = c("Ätran","Örekilsälven","Göta älv","Lagan","Västerhavet (hela) ICES SD 20-21","Genevadsån","Fylleån","Stensån"),
                      stock.unit = "Sweden", 
                      stock.origin = NA)

# Add AU and SU to lat lons from Swedish sötebasen
SweFin.rivers <- swe.sallaa %>%
  drop_na(length_mm) %>%
  distinct(sai_location, WGS84_N_Vatten, WGS84_E_Vatten) %>%
  rename(fisa_y_4326 = WGS84_N_Vatten,
         fisa_x_4326 = WGS84_E_Vatten) %>%
  mutate(fisa_y_4326 = case_when(sai_location == "Östersjön (hela) ICES SD 22-32" ~ 58.475309,
                         .default = fisa_y_4326),
         fisa_x_4326 = case_when(sai_location == "Östersjön (hela) ICES SD 22-32" ~ 19.780140,
                         .default = fisa_x_4326)) %>%
  bind_rows(data.frame(sai_location = c("Tornionjoki", "Simojoki", "Muonionjoki")) %>%
              mutate(fisa_y_4326 = case_when(sai_location %in% c("Tornionjoki", "Muonionjoki") ~ 65.879905,
                                     sai_location == "Simojoki" ~ 65.625639,
                                     sai_location == "Östersjön (hela) ICES SD 22-32" ~ 58.475309,
                                     sai_location == "Gideälven" ~ 63.327482,
                                     .default = NA),
                     fisa_x_4326 = case_when(sai_location %in% c("Tornionjoki", "Simojoki", "Muonionjoki") ~ 24.136424,
                                     sai_location == "Simojoki" ~ 25.052169,
                                     sai_location == "Östersjön (hela) ICES SD 22-32" ~ 19.780140,
                                     sai_location == "Gideälven" ~ 19.140244,
                                      .default = NA))) %>%
  left_join(AU.rivers) %>%
  left_join(SU.rivers) %>%
  mutate(region = if_else(stock.unit == "Sweden", "Swedish.westcoast", "Baltic.sea", missing = "Baltic.sea" )) 

# French rivers by region and sub-regions from Marie Nevoux
fra.rivers <- read_csv(file =  "/Users/vitl0001/Documents/Projects/DIASPARA/riviere_region_France.csv") %>%
  mutate(sai_location = str_to_title(river),
         stock.unit = "France")

# French sai_location abbreviations for regional genotypic aggregations from Perrier et al. 2011
fra.rivabb <- read_delim(file =  "/Users/vitl0001/Documents/Projects/DIASPARA/french_genotypes_Perrier.txt", delim = "\t") %>% 
  rename(sai_location = River) %>%
  mutate(sai_location.abb = str_to_upper(str_sub(sai_location,start = 1, end = 3)),
         sai_location.abb = case_when(sai_location == "NIVE"  ~ "NIE",
                               sai_location == "NIVELLE"  ~ "NIL",
                               .default = sai_location.abb),
         sai_location = str_to_title(sai_location)) %>%
  mutate(region.gen = case_when(sai_location.abb %in% c("COU","TRI","DOU","LEG","STE","AUL","GOY","ELO","ELL","PEN","ODE","AVE","JET","SCO","BLA") ~ "Brittany",
                            sai_location.abb %in% c("ORN", "VIR","SEI","SAI","SIE","SEL","SEE") ~ "Lower-Normandy",
                            sai_location.abb %in% c("NIL","NIE","GAV") ~ "Adour",
                            sai_location.abb %in% c("GAR","DOR","ALL") ~ "Allier-Gironde",
                            sai_location.abb %in% c("TOU","VAL","AUT","CAN","BRE","ARQ") ~ "Upper-Normandy",
                            .default = NA))

# French sai_location coordinates from Hilaire Drouineau
fra.rivers.sf <- read_sf("/Users/vitl0001/Documents/Projects/DIASPARA/salmon_frarivers", stringsAsFactors = FALSE)

# Combine all French sai_location info
fra.rivers2  <- fra.rivers %>%
  full_join(fra.rivabb) %>%
  bind_rows(# adding missing rivers from fra.sallaa
            tibble(sai_location = c("Isole", "Etel", "Quillec", "Horn", "St Laurent"), region.gen = "Brittany", stock.unit = "France"),
            tibble(sai_location = c("Baie Du Mont Saint Michel","Thar"), region.gen = "Lower-Normandy", stock.unit = "France"),
            tibble(sai_location = c("Loire"), region.gen = "Allier-Gironde", stock.unit = "France"),
            tibble(sai_location = c("Durdent"), region.gen = "Upper-Normandy", stock.unit = "France")) %>% 
  left_join(st_coordinates(fra.rivers.sf) %>%
              as.data.frame() %>%
              rename(fisa_x_4326 = X,
                     fisa_y_4326 = Y) %>%
              bind_cols(st_set_geometry(fra.rivers.sf, NULL)) %>% rename(sai_location = "river")) %>%
  # assign region.gen to those missing
  mutate(region.gen = case_when(sai_location == "Oir" ~ "Lower-Normandy", 
                                subregion == "Adour" & is.na(sai_location.abb) ~ "Adour",
                                subregion == "Gironde" & is.na(sai_location.abb) ~ "Allier-Gironde",
                                region == "Bretagne" & is.na(sai_location.abb) ~ "Brittany",
                                .default = region.gen)) %>%
  # remove those " + affl" which are tributaries, Gave Mauleon (Le Saison) which is Gave Mauleon, "Gave'oloron duplictae and See Selune which are exists individually.
  filter(!sai_location %in% c("See Selune","Odet + Affl","Elle + Affl","Gave Mauleon (Le Saison)", "Gave D'oloron") ) %>% 
  # add fisa_y_4326 and lons where missing.
  mutate(fisa_y_4326 = case_when(sai_location == "Baie Du Mont Saint Michel" ~ 48.655943,
                         sai_location == "Valmont" ~ 49.761966,
                         sai_location == "Seine" ~ 49.435474,
                         sai_location == "Isole" ~ 47.874431,
                         sai_location == "Loire" ~ 47.281585,
                         sai_location == "Finistere" ~ 48.306467, 
                         sai_location == "Etel" ~ 47.656579,
                         sai_location == "Quillec" ~ 48.685033,
                         sai_location == "Etel" ~ 47.656579,
                         sai_location == "Thar" ~ 48.800103,
                         sai_location == "Loire" ~ 47.281585,
                         sai_location == "Horn" ~ 48.688119,
                         sai_location == "Durdent" ~ 48.687806,
                         sai_location == "Canche" ~ 50.527333,
                         sai_location == "Couesnon" ~ 48.625250,
                         sai_location == "St Laurent" ~ 47.903795,
                         .default = fisa_y_4326),
         fisa_x_4326 = case_when(sai_location == "Baie Du Mont Saint Michel" ~ -1.656370,
                         sai_location == "Valmont" ~ 0.377126,
                         sai_location == "Seine" ~ 0.285060,
                         sai_location == "Isole" ~ -3.546855,
                         sai_location == "Loire" ~ -2.152414,
                         sai_location == "Finistere" ~ -4.080223,
                         sai_location == "Etel" ~ -3.209520,
                         sai_location == "Quillec" ~ -4.069429,
                         sai_location == "Thar" ~ -1.568264,
                         sai_location == "Loire" ~ -2.152414,
                         sai_location == "Horn" ~ -4.058391,
                         sai_location == "Durdent" ~ 0.608712,
                         sai_location == "Canche" ~ 1.614964,
                         sai_location == "Couesnon" ~ -1.511461,
                         sai_location == "St Laurent" ~ -3.945979,
                         .default = fisa_x_4326)) %>%
  # remove non-necessary info
  select(-subregion,-region,-id,-sai_location.abb) 

3. Length at age

3a. Filter, clean and combine data

  • Remove NA values in length AND age.
  • Remove individuals from Swedish lakes
  • Correct an obvious outlier in the Swedish length data
  • Remove three mark-recaptured individuals in Swedish data
  • Remove an obvious outlier in the French data
  • Correct an obvious outlier in the French data
  • Calculate total length (\(L_t\)) from fork length (\(L_f\)) where needed in the French and Latvian data based on the model: \(exp(0.2892351)*L_f^{0.9623479}\) (based on French salmon where we have both Lt and Lf, see below)
  • The french data has two sources of sex identification (observed in the field vs genetic). I use Genetic sex where available and complete these data with field observations. M. Nevoux considers field observations correct only from (mid) August. From the data where both genetic and field method are available, 14% are incorrect. Keep this in mind if using this info. The sex in Finnish (and likely in Swedish data) is determined visually when gutting the fish and far from all are determined (many NAs).

The kept variables are: length, sai_location, sai_cou_code, origin, fi_year, age_ad, sex. Spatial varaibles are added to the data from the tables created in 2.

Show code
# 14 % of the sex determinations in th field are wrong.
fra.sallaa %>%
  drop_na(`Genetic sex`, `Sex observed in the field`) %>%
  rename(gs = `Genetic sex`,
         fs = `Sex observed in the field`) %>%
  filter(gs != fs) %>%
  summarise(perc.incorr = 100*n()/nrow(fra.sallaa %>% drop_na(`Genetic sex`, `Sex observed in the field`)))
drop_na: removed 68,383 rows (95%), 3,738 rows remaining
rename: renamed 2 variables (fs, gs)
filter: removed 3,031 rows (81%), 707 rows remaining
drop_na: removed 68,383 rows (95%), 3,738 rows remaining
summarise: now one row and one column, ungrouped
# A tibble: 1 × 1
  perc.incorr
        <dbl>
1        18.9
Show code
# Model to convert fork length in the French data to total length. V.T. models this relationship using a log-linear model (Lt = a*Lf^b) to estimate a and b. Lt ~ Lf is almost linear but a log(Lt)~log(Lf) relationship makes for a seemingly better fit.
fra.sallaa %>%
  filter(total_length > 100) %>% 
  drop_na(total_length, fork_length) %>%
  lm(log(total_length) ~ log(fork_length), data = .) %>%
  tidy() %>%
  pull(estimate) 
filter: removed 22,077 rows (31%), 50,044 rows remaining
drop_na: removed 49,373 rows (99%), 671 rows remaining
[1] 0.2892351 0.9623479
Show code
#looks good:
fra.sallaa %>%
  filter(total_length > 100) %>% # an obvious outlier where Lf >> Lt
  drop_na(total_length, fork_length) %>%
  ggplot(aes(fork_length, total_length)) +
  geom_point() +
  geom_line(aes(x = fork_length,  y = exp(0.2892351)*fork_length^0.9623479), col = "red") +
  labs(title = "fork to total length fit length at age")
filter: removed 22,077 rows (31%), 50,044 rows remaining
drop_na: removed 49,373 rows (99%), 671 rows remaining

Show code
# There are three mark-recaptured individuals in Swedish db Sötebasaen when NA lengths and ages are removed. As they are so few: Id them and remove the first age-length measurement and keep later one (when they are bigger). They are all recaptured within the same year.
dup.markrec <- swe.sallaa %>%
  # remove NAs
  drop_na(length_mm, sea_age_year, MärkeNr) %>%
  # find the mark-recaptured ones by counting rows by tag number 
  mutate(n = n(), .by = MärkeNr) %>% 
  # filter the mark-recaps and those with info that is not tags ("finclipped")
  filter(n > 1 & !MärkeNr == "Fenklippt") %>%
  # id the mark (shorter) instance
  slice_min(length_mm, by = MärkeNr)
drop_na: removed 105,549 rows (>99%), 300 rows remaining
mutate: new variable 'n' (integer) with 3 unique values and 0% NA
filter: removed 294 rows (98%), 6 rows remaining
slice_min: removed 3 rows (50%), 3 rows remaining
Show code
# fix and combine swedish and finnish data
swefin.sallaa <- 
  swe.sallaa %>%
  # remove the mark-recaps
  anti_join(dup.markrec) %>%
  # remove the lakes in the data
  filter(!sai_location %in% c("Vättern", "Vänern")) %>% 
  drop_na(length_mm) %>%
  mutate(length_mm = ifelse(length_mm > 2000, length_mm/10, length_mm)) %>%
  dplyr::select(sai_cou_code, fi_year, sai_location, origin, length_mm, weight_g, AU, sea_age_year, juvenile_age_year, sex, date) %>%
  bind_rows(fin.sallaa %>% 
              # remove individuals without length or sea age
              drop_na(length_mm) %>% 
              dplyr::select("sai_cou_code", "fi_year", "sai_location", "origin", "length_mm", "weight_g", "sea_age_year", "juvenile_age_year", "sex")) %>%
  bind_rows(fin.sallaa2 %>%
              # remove individuals without length or sea age
              drop_na(length_mm) %>% 
              dplyr::select(sai_cou_code, fi_year, sai_location, origin, length_mm, weight_g,sea_age_year,juvenile_age_year,sex, date)) %>%
  left_join(SweFin.rivers, by = "sai_location") %>%
  # prefer the existing AU before the new one.
  mutate(asses.unit = if_else(is.na(AU), asses.unit, AU)) %>% 
  dplyr::select(!AU)
Joining with `by = join_by(InsamlingID, Serie, InsamlMetod, AnstrTyp,
IndividID, sai_location, RT90_X_Vatten, RT90_Y_Vatten, S99TM_N_Vatten,
S99TM_E_Vatten, WGS84_N_Vatten, WGS84_E_Vatten, Plats, RT90_X_Plats,
RT90_Y_Plats, S99TM_N_Plats, S99TM_E_Plats, WGS84_N_Plats, WGS84_E_Plats,
Subdiv, AU, Syfte, fi_year, FångstDatum, Art, Åldersprov, IndividNr, length_mm,
weight_g, Behandling, Kön, stage, Genprov, Märkning, MärkeNr, Märkning2,
Märke2Nr, origin, juvenile_age_year, sea_age_year, Pluszon, AntalLek,
ÅlderLek1, ÅlderLek2, ÅlderLek3, Tydlighet, AnmÅlder, sai_cou_code, date, sex)`
anti_join: added no columns
> rows only in x 105,846
> rows only in dup.markrec ( 0)
> matched rows ( 3)
> =========
> rows total 105,846
filter: removed 36,152 rows (34%), 69,694 rows remaining
drop_na: removed 3,682 rows (5%), 66,012 rows remaining
mutate: changed one value (<1%) of 'length_mm' (0 new NAs)
drop_na: removed 254 rows (18%), 1,125 rows remaining
drop_na: removed 771 rows (5%), 13,908 rows remaining
left_join: added 6 columns (fisa_y_4326, fisa_x_4326, asses.unit, stock.origin,
stock.unit, …)
> rows only in x 0
> rows only in SweFin.rivers ( 4)
> matched rows 81,051 (includes duplicates)
> ========
> rows total 81,051
mutate: changed 1,215 values (1%) of 'asses.unit' (1,140 fewer NAs)
Show code
# fix Latvia data
lva.sallaa.b <- lva.sallaa %>% 
              mutate(length_mm = if_else(`method_length(0=total,1=fork length)` == 1, exp(0.2892351)*length_mm^0.9623479, length_mm)) %>%
              dplyr::select(sai_cou_code,fi_year,sai_location,origin,length_mm,weight_g,sea_age_year,juvenile_age_year,sex,fisa_y_4326,fisa_x_4326,date) %>% 
              mutate(asses.unit = 5,
                     region = "Baltic.sea")
mutate: changed 27,430 values (>99%) of 'length_mm' (0 new NAs)
mutate: new variable 'asses.unit' (double) with one unique value and 0% NA
        new variable 'region' (character) with one unique value and 0% NA
Show code
# fix combine fra.sallaa and fra.sallaa2 
fra.sallaa.b <- fra.sallaa %>% 
  rename(gen.sex = `Genetic sex`,
         field.sex = `Sex observed in the field`) %>%
  mutate(# remove french accents and hyphens
    sai_location = stringi::stri_trans_general(sai_location, "Latin-ASCII"),
    sai_location = str_replace_all(sai_location,"-"," "),
    # changing tributaries to main sai_location 
    sai_location = case_when(sai_location %in% c("Varenne","Bethune") ~ "Arques", 
                             sai_location %in% c("Inam") ~ "Elle",
                             sai_location %in% c("Austreberthe") ~ "Seine",
                             sai_location %in% c("Arroux", "Allier") ~ "Loire",
                             sai_location %in% c("Jet","Steir") ~ "Odet",
                             .default = sai_location),
    # Using observed sex in the field when genetic is missing to complete info
    sex = str_to_lower(if_else(is.na(gen.sex), field.sex, gen.sex)), 
    # correct a "1" valued entry to NA
    sex = if_else(sex %in% c("f","m"), sex, NA),
    # calculate total from fork length and correct TL outlier: 51 mm and 2 yo
    length_mm = if_else(is.na(total_length) | total_length == 51, exp(0.2892351)*fork_length^0.9623479, total_length), 
    # Correct one ind. at 7900 mm and assume it is 790 mm
    length_mm = ifelse(length_mm > 2000, length_mm/10, length_mm),
    ) %>% 
  drop_na(length_mm) %>%
  dplyr::select(sai_cou_code,fi_year,sai_location, origin, length_mm, sea_age_year,juvenile_age_year,sex,date) %>%
  mutate(weight_g = NA) %>%
  bind_rows(fra.sallaa2 %>% 
              mutate(origin = "wild",
                     sea_age_year = NA,
                     length_mm = exp(0.2892351)*fork_length^0.9623479,
                     juvenile_age_year = as.numeric(if_else(juvenile_age_year == "NA", NA, juvenile_age_year))) %>%
              dplyr::select(sai_cou_code, fi_year, sai_location, origin, length_mm, sea_age_year,juvenile_age_year, sex, sai_lfs_code,date) %>%
              mutate(weight_g = NA)) %>%
  left_join(fra.rivers2)
rename: renamed 2 variables (field.sex, gen.sex)
mutate: changed 14,902 values (21%) of 'sai_location' (0 new NAs)
        new variable 'sex' (character) with 3 unique values and 42% NA
        new variable 'length_mm' (double) with 741 unique values and 0% NA
drop_na: no rows removed
mutate: new variable 'weight_g' (logical) with one unique value and 100% NA
mutate: converted 'juvenile_age_year' from character to double (57235 new NA)
        new variable 'origin' (character) with one unique value and 0% NA
        new variable 'sea_age_year' (logical) with one unique value and 100% NA
        new variable 'length_mm' (double) with 207 unique values and 0% NA
mutate: new variable 'weight_g' (logical) with one unique value and 100% NA
Joining with `by = join_by(sai_location)`
left_join: added 5 columns (river, stock.unit, region.gen, fisa_x_4326, fisa_y_4326)
           > rows only in x                  0
           > rows only in fra.rivers2 (     12)
           > matched rows              196,054    (includes duplicates)
           >                          =========
           > rows total                196,054
Show code
# Fix Irish data
ie.sallaa.b <- ie.sallaa %>%
  mutate(length_mm = if_else(`method_length(0=total,1=fork length)` == 1,
                             exp(0.2892351)*length_mm^0.9623479, length_mm),
           fisa_x_4326 = as.numeric(gsub("\u2013|\u2014", "-", fisa_x_4326))
         ) %>%
  dplyr::select(sai_cou_code,fi_year,sai_location,origin,length_mm,weight_g,sea_age_year,juvenile_age_year,sex,fisa_y_4326,fisa_x_4326,date) %>%
  mutate(stock.unit = "Ireland",
         region = "Burrishoole")
mutate: converted 'fisa_x_4326' from character to double (0 new NA)
        changed 2,965 values (100%) of 'length_mm' (0 new NAs)
mutate: new variable 'stock.unit' (character) with one unique value and 0% NA
        new variable 'region' (character) with one unique value and 0% NA
Show code
all.sallaa <- swefin.sallaa %>%
  bind_rows(fra.sallaa.b, lva.sallaa.b, ie.sallaa.b)

str(all.sallaa)
tibble [307,558 × 19] (S3: tbl_df/tbl/data.frame)
 $ sai_cou_code     : chr [1:307558] "SWE" "SWE" "SWE" "SWE" ...
 $ fi_year          : num [1:307558] 2015 2015 2015 2015 2015 ...
 $ sai_location     : chr [1:307558] "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" ...
 $ origin           : chr [1:307558] "wild" "wild" "wild" "reared" ...
 $ length_mm        : num [1:307558] 990 840 860 880 860 1000 860 820 830 840 ...
 $ weight_g         : num [1:307558] 930000 540000 590000 440000 500000 830000 550000 510000 530000 550000 ...
 $ sea_age_year     : num [1:307558] 4 2 2 3 3 3 2 2 3 2 ...
 $ juvenile_age_year: num [1:307558] 3 3 2 NA 4 3 3 3 2 3 ...
 $ sex              : chr [1:307558] "m" "f" "f" "f" ...
 $ date             : POSIXct[1:307558], format: "2017-09-27" "2015-06-14" ...
 $ fisa_y_4326      : num [1:307558] 58.5 58.5 58.5 58.5 58.5 ...
 $ fisa_x_4326      : num [1:307558] 19.8 19.8 19.8 19.8 19.8 ...
 $ asses.unit       : num [1:307558] NA NA NA NA NA NA NA NA NA NA ...
 $ stock.origin     : chr [1:307558] NA NA NA NA ...
 $ stock.unit       : chr [1:307558] NA NA NA NA ...
 $ region           : chr [1:307558] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ sai_lfs_code     : chr [1:307558] NA NA NA NA ...
 $ river            : chr [1:307558] NA NA NA NA ...
 $ region.gen       : chr [1:307558] NA NA NA NA ...

3b. Create spatial units

Show code
all.sallaa <- all.sallaa %>%
  mutate(spat.unit = case_when(region == "Baltic.sea" ~ paste0(region,":AU-",asses.unit),
                               region == "Swedish.westcoast" ~ region,
                               region == "Burrishoole" ~ region,
                               .default = region.gen))
mutate: new variable 'spat.unit' (character) with 13 unique values and 0% NA

3c. Make age types Map and summary of length at age

Check combinations of juvenile and sea age in the data

Show code
all.sallaa %>%
  mutate(sea_age_year = if_else(sea_age_year > 0, 1, sea_age_year),
         juvenile_age_year = if_else(juvenile_age_year > 0, 1, sea_age_year)) %>%
  distinct(sea_age_year,juvenile_age_year)
mutate: changed 61,411 values (20%) of 'sea_age_year' (0 new NAs)
        changed 122,936 values (40%) of 'juvenile_age_year' (53,832 new NAs)
distinct: removed 307,551 rows (>99%), 7 rows remaining
# A tibble: 7 × 2
  sea_age_year juvenile_age_year
         <dbl>             <dbl>
1            1                 1
2            1                NA
3           NA                NA
4           NA                 1
5            0                 0
6            0                NA
7            0                 1

Classify age types - sea and smolt age only or both age types present.

Show code
all.sallaa2 <- all.sallaa %>% 
  # create life stage and total age columns
  mutate(age.type = case_when(is.na(sea_age_year) & is.na(juvenile_age_year) ~ NA,
                              is.na(sea_age_year) ~ "juve.only",
                              is.na(juvenile_age_year) ~ "sea.only",
                              sea_age_year >= 0 & juvenile_age_year >= 0 ~ "both",
                              .default = NA))
mutate: new variable 'age.type' (character) with 4 unique values and 30% NA
Show code
# looks good
all.sallaa2 %>%
  mutate(sea_age_year = if_else(sea_age_year > 0, 1, sea_age_year),
         juvenile_age_year = if_else(juvenile_age_year > 0, 1, sea_age_year)) %>%
  distinct(sea_age_year,juvenile_age_year, age.type)
mutate: changed 61,411 values (20%) of 'sea_age_year' (0 new NAs)
        changed 122,936 values (40%) of 'juvenile_age_year' (53,832 new NAs)
distinct: removed 307,550 rows (>99%), 8 rows remaining
# A tibble: 8 × 3
  sea_age_year juvenile_age_year age.type 
         <dbl>             <dbl> <chr>    
1            1                 1 both     
2            1                NA sea.only 
3           NA                NA <NA>     
4           NA                 1 juve.only
5            0                 0 both     
6            0                NA sea.only 
7            0                 1 both     
8           NA                NA juve.only
Show code
# calculate total age
all.sallaa3 <- all.sallaa2 %>%
   mutate(tot_age_year = ifelse(rowSums(!is.na(across(c(juvenile_age_year, sea_age_year)))) == 0,NA,
                         rowSums(across(c(juvenile_age_year, sea_age_year)),
                                 na.rm = TRUE)))
mutate: new variable 'tot_age_year' (double) with 14 unique values and 30% NA
Show code
all.sallaa3 %>%
  drop_na(age.type) %>%
  ggplot(aes(tot_age_year, length_mm, color = sai_cou_code)) +
  geom_point() +
  facet_wrap(~age.type) +
  scale_x_continuous(breaks = seq(0, 13, 1) )
drop_na: removed 91,340 rows (30%), 216,218 rows remaining

Show code
# some fish are too large to be smolts - these need to be corrected
all.sallaa3 %>%
  filter(age.type == "juve.only" & length_mm >= 290) %>%
  distinct(sai_cou_code,sai_location)
filter: removed 304,532 rows (99%), 3,026 rows remaining
distinct: removed 3,008 rows (99%), 18 rows remaining
# A tibble: 18 × 2
   sai_cou_code sai_location                    
   <chr>        <chr>                           
 1 SWE          Ätran                           
 2 SWE          Örekilsälven                    
 3 SWE          Göta älv                        
 4 SWE          Umeälven                        
 5 SWE          Torneälven                      
 6 SWE          Piteälven                       
 7 SWE          Östersjön (hela) ICES SD 22-32  
 8 SWE          Västerhavet (hela) ICES SD 20-21
 9 SWE          Mörrumsån                       
10 FIN          Tornionjoki                     
11 LV           Amata                           
12 LV           Gauja                           
13 LV           Tebra                           
14 LV           Pēterupe                        
15 LV           Vitrupe                         
16 LV           Irbe                            
17 LV           Užava                           
18 LV           Venta                           
Show code
# Fix ages that are not correctly classified
all.sallaa4 <- all.sallaa3 %>%
  # set sea age = juve age  
  mutate(sea_age_year = case_when(age.type == "juve.only" & length_mm >= 290 ~ juvenile_age_year,
                                  .default = sea_age_year),
         # set juven. age = NA,  
         juvenile_age_year = case_when(age.type == "juve.only" & length_mm >= 290 & !is.na(juvenile_age_year) ~ NA,
                                       .default = juvenile_age_year),
         # reclassify their age types
         age.type = case_when(age.type == "juve.only" & length_mm >= 290 ~ "sea.only",
                              .default = age.type)) %>%
  # one "both" that should be sea.anly
  mutate(age.type = if_else(age.type == "both" & length_mm >= 500 & tot_age_year == 0, "sea.only", age.type) )
mutate: changed 3,026 values (1%) of 'sea_age_year' (3,026 fewer NAs)
        changed 3,026 values (1%) of 'juvenile_age_year' (3,026 new NAs)
        changed 3,026 values (1%) of 'age.type' (0 new NAs)
mutate: changed one value (<1%) of 'age.type' (0 new NAs)
Show code
# Check. 
# Id sites with smolts
swj <- all.sallaa4 %>% filter(age.type == "juve.only") %>% distinct(sai_location) %>% pull(sai_location) 
filter: removed 216,628 rows (70%), 90,930 rows remaining
distinct: removed 90,910 rows (>99%), 20 rows remaining
Show code
all.sallaa4 %>%
  drop_na(age.type) %>%
  ggplot(aes(tot_age_year, length_mm, color = age.type)) +
  geom_point() +
  facet_wrap(~age.type) +
all.sallaa4 %>%
  drop_na(age.type) %>%
  filter(sai_location %in% swj) %>%
  ggplot(aes(tot_age_year, length_mm, color = age.type)) +
  geom_point() +
  facet_wrap(~sai_location)
drop_na: removed 91,340 rows (30%), 216,218 rows remaining
drop_na: removed 91,340 rows (30%), 216,218 rows remaining
filter: removed 96,484 rows (45%), 119,734 rows remaining

3d. Calculate continous age based on day of year integer

To account for growth

Show code
# what day of year are the fish that we have age for caught?
all.sallaa4 %>%
  filter(!is.na(tot_age_year)) %>%
  mutate(yday = yday(date)) %>%
  count(yday) %>%   # counts per day
  ggplot(aes(x = yday, y = n, group = 1)) +
  geom_line(color = "steelblue") +
  geom_vline(xintercept = 365/2, color = "red")
filter: removed 91,340 rows (30%), 216,218 rows remaining
mutate: new variable 'yday' (double) with 366 unique values and 1% NA
count: now 366 rows and 2 columns, ungrouped
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).

Show code
# what day of year are the fish that we DONT have age for caught?
all.sallaa4 %>%
  filter(is.na(tot_age_year)) %>%
  mutate(yday = yday(date)) %>%
  count(yday) %>%   # counts per day
  ggplot(aes(x = yday, y = n, group = 1)) +
  geom_line(color = "steelblue") +
  geom_vline(xintercept = 365/2, color = "red")
filter: removed 216,218 rows (70%), 91,340 rows remaining
mutate: new variable 'yday' (double) with 366 unique values and 4% NA
count: now 366 rows and 2 columns, ungrouped
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).

Show code
# add a decimal to year to be able account for additional growth in th last year
all.sallaa5 <- all.sallaa4 %>%
  mutate(tot_age_decimyear = yday(date)/365)
mutate: new variable 'tot_age_decimyear' (double) with 367 unique values and 2%
NA

3e. Correct weights

There is no weight measurements in the French length at age data.

Show code
# Swedish weights are in milligrams*10. So divide by 100 to get grams.
all.sallaa5 %>%
  drop_na(tot_age_year) %>% # two na-aged Swedish fish with wrong lw-relationships removed
  ggplot() +
  geom_point(aes(length_mm, weight_g, color = sai_cou_code)) +
  facet_wrap(~sai_cou_code, scales = "free") +
  labs(subtitle = "no weight data on French and Irish fish")
drop_na: removed 91,340 rows (30%), 216,218 rows remaining
Warning: Removed 159492 rows containing missing values or values outside the scale range
(`geom_point()`).

Show code
all.sallaa6 <- all.sallaa5 %>%
  mutate(weight_g = if_else(sai_cou_code == "SWE", weight_g/100, weight_g))
mutate: changed 59,460 values (19%) of 'weight_g' (0 new NAs)
Show code
# light weight ones in Latvia, CHECK these with Janis at BIOR
all.sallaa6 %>%
  drop_na(tot_age_year) %>%
  filter(sai_cou_code == "LV",
         weight_g < 100,
         length_mm >200)
drop_na: removed 91,340 rows (30%), 216,218 rows remaining
filter: removed 215,117 rows (99%), 1,101 rows remaining
# A tibble: 1,101 × 23
   sai_cou_code fi_year sai_location origin length_mm weight_g sea_age_year
   <chr>          <dbl> <chr>        <chr>      <dbl>    <dbl>        <dbl>
 1 LV              1999 Amata        wild        660.     3.11            0
 2 LV              1999 Amata        wild        751.     5.09            0
 3 LV              1999 Amata        wild        670.     3.85            0
 4 LV              1999 Amata        wild        821.     6.15            0
 5 LV              1999 Amata        wild        771.     4.87            0
 6 LV              1999 Amata        wild        680.     3.78            0
 7 LV              2006 Amata        wild        821.     7.4             0
 8 LV              2006 Amata        wild        781.     6               0
 9 LV              2006 Amata        wild        620.     3.2             0
10 LV              2006 Amata        wild        680.     3.6             0
# ℹ 1,091 more rows
# ℹ 16 more variables: juvenile_age_year <dbl>, sex <chr>, date <dttm>,
#   fisa_y_4326 <dbl>, fisa_x_4326 <dbl>, asses.unit <dbl>, stock.origin <chr>,
#   stock.unit <chr>, region <chr>, sai_lfs_code <chr>, river <chr>,
#   region.gen <chr>, spat.unit <chr>, age.type <chr>, tot_age_year <dbl>,
#   tot_age_decimyear <dbl>
Show code
# still some odd l-w:s to fix if weight is used. 
all.sallaa6 %>%
  drop_na(tot_age_year) %>%
  ggplot() +
  geom_point(aes(length_mm, weight_g, color = sai_cou_code))
drop_na: removed 91,340 rows (30%), 216,218 rows remaining
Warning: Removed 159492 rows containing missing values or values outside the scale range
(`geom_point()`).

3d. Map and summary of length at age

Show code
# length at age by a suitable spatial aggregation (spat.unit) is defined as French regions, Swedish west coast and Baltic assessment units. This results in 10 spatial units (plus an NA group which will disappear when the French region information is complete) which should represent genetic and ecological units. There are observations without assessment units in the Baltic as the only spatial information we have is that they are from the Baltic as a whole. 
all.sallaa6 %>%
  ggplot(aes(sea_age_year, length_mm, color = spat.unit)) +
  geom_point() +
  facet_wrap( ~spat.unit) 
Warning: Removed 182270 rows containing missing values or values outside the scale range
(`geom_point()`).

Show code
plot_map_Euro +
  geom_point(data = all.sallaa6 %>%
               drop_na(fisa_x_4326, fisa_y_4326) %>%
               add_utm_columns(ll_names = c("fisa_x_4326", "fisa_y_4326"), utm_crs = 32633),
             aes(X*1000, Y*1000), color = "#440154FF", size = 0.3, show.legend = TRUE) +
  theme_sleek()
drop_na: no rows removed
Warning in plot_theme(plot): The `tagger.panel.tag.text` theme element is not defined in the element
hierarchy.

Show code
ggsave("laa_map.png", scale = 0.8)
Saving 5.6 x 4 in image
Warning in plot_theme(plot): The `tagger.panel.tag.text` theme element is not defined in the element
hierarchy.
Warning in plot_theme(plot): The `tagger.panel.tag.text` theme element is not defined in the element
hierarchy.
Show code
# ind counts by spatial unit
all.sallaa6 %>%
  #filter(age.type == "juve.only") %>%
  mutate(river1 = paste0(sai_cou_code,":",sai_location)) %>% 
  summarise(count = n(), .by = c(fi_year, sai_cou_code, river1)) %>%
  mutate(count.ind = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         count.ind = fct_reorder(count.ind, count)) %>%
  #filter(sai_cou_code == "FRA") %>%
  ggplot(aes(fi_year, river1, fill = count.ind, group = sai_cou_code)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light() +
  theme(axis_text=element_text(size=5)) +
  labs(title = "Length at age individuals by river and year",
       subtitle = "Juveniles only")
mutate: new variable 'river1' (character) with 119 unique values and 0% NA
summarise: now 1,796 rows and 4 columns, ungrouped
mutate: new variable 'count.ind' (factor) with 4 unique values and 0% NA
Warning in plot_theme(plot): The `axis_text` theme element is not defined in
the element hierarchy.
Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_tile()`).

Show code
# ind counts by sai_location excluding France
all.sallaa6 %>%
  mutate(river1 = paste0(sai_cou_code,":",sai_location)) %>% 
  summarise(count = n(), .by = c(fi_year, sai_cou_code, river1)) %>%
  mutate(count.ind = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         count.ind = fct_reorder(count.ind, count)) %>%
  filter(!sai_cou_code == "FRA") %>%
  ggplot(aes(fi_year, river1, fill = count.ind, group = sai_cou_code)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light()
mutate: new variable 'river1' (character) with 119 unique values and 0% NA
summarise: now 1,796 rows and 4 columns, ungrouped
mutate: new variable 'count.ind' (factor) with 4 unique values and 0% NA
filter: removed 1,287 rows (72%), 509 rows remaining
Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_tile()`).

Show code
# France only
all.sallaa6 %>%
  mutate(river1 = paste0(sai_cou_code,":",sai_location)) %>% 
  summarise(count = n(), .by = c(fi_year, sai_cou_code, river1)) %>%
  mutate(count.ind = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         count.ind = fct_reorder(count.ind, count)) %>%
  filter(sai_cou_code == "FRA") %>%
  ggplot(aes(fi_year, river1, fill = count.ind, group = sai_cou_code)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light()
mutate: new variable 'river1' (character) with 119 unique values and 0% NA
summarise: now 1,796 rows and 4 columns, ungrouped
mutate: new variable 'count.ind' (factor) with 4 unique values and 0% NA
filter: removed 509 rows (28%), 1,287 rows remaining

By suggested spatial aggregation:

Show code
# ind counts by a suggested spatial aggregation
all.sallaa6 %>%
  summarise(count = n(), .by = c(fi_year, spat.unit)) %>%
  mutate(Ind.count = as.factor(ifelse(count > 50, ">50",
                                      ifelse(count > 30 & count <= 50, ">30",
                                             ifelse(count > 10 & count <= 30, ">10",
                                                    "1 - 10")))),
         Ind.count = fct_reorder(Ind.count, count)) %>% 
  ggplot(aes(fi_year, spat.unit, fill = Ind.count)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  labs(y = "") +
  theme_light()
summarise: now 485 rows and 3 columns, ungrouped
mutate: new variable 'Ind.count' (factor) with 4 unique values and 0% NA
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_tile()`).

Show code
ggsave("laa_temp.png", scale = 0.8)
Saving 5.6 x 4 in image
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_tile()`).
Show code
# sex information by suggested spatial region
all.sallaa6 %>%
  summarise(ind.count = n(), .by = c(spat.unit, sex)) %>%
  ggplot(aes(ind.count, sex, fill = spat.unit)) +
  geom_bar(stat="identity", position = "stack") +
  scale_fill_viridis_d() 
summarise: now 39 rows and 3 columns, ungrouped

Show code
# Number of individuals by suggested spatial region
all.sallaa6 %>%
  summarise(ind.count = n(), .by = c(fi_year, spat.unit)) %>%
  ggplot(aes(ind.count, spat.unit)) +
  geom_bar(stat="identity", position = "stack", fill = "#440154FF") +
  geom_text(aes(label = ind.count), nudge_x = 2400)
summarise: now 485 rows and 3 columns, ungrouped

4. Fecundity at length

4a. Filter, clean and combine data

  • Remove NA values in length and fecundity (n.eggs)
  • Correct an obvious outlier in the Finnish data 1
  • Calculate total length (\(L_t\))from fork length (\(L_f\)) where needed in the French data based on the model: \(exp(0.05704206)*L_f^{0.99670332}\) (see below).

The kept variables are: length, sai_location, sai_cou_code, origin, fi_year, n.eggs. Spatial variables are added to the data from the tables created in 2.

Show code
# Model to convert French fork lengths in the fecundity data to total lengths
fra.salfec %>%
  drop_na(length.t, length.f) %>%
  lm(log(length.t) ~ log(length.f), data = .) %>%
  tidy() %>%
  pull(estimate) 
drop_na: removed 226 rows (45%), 276 rows remaining
[1] 0.05704206 0.99670332
Show code
fra.salfec %>%
  drop_na(length.t, length.f) %>%
  ggplot(aes(length.f, length.t)) +
  geom_point() +
  geom_line(aes(x = length.f,  y = exp(0.05704206)*length.f^0.99670332), col = "red") +
  labs(title = "fork to total length fit fecundity")
drop_na: removed 226 rows (45%), 276 rows remaining

Show code
# Combine the data
all.salfec <- swe.salfec %>%
  drop_na(length_mm, n.eggs) %>%
  dplyr::select("length_mm", "weight_g", "sai_location", "sai_cou_code", "origin", "fi_year", "n.eggs") %>%
  bind_rows(swe.salfec2 %>% 
              drop_na(length_mm, n.eggs) %>%
              dplyr::select("length_mm", "weight_g", "sai_location", "sai_cou_code", "origin", "fi_year", "n.eggs")) %>%
  bind_rows(fin.salfec1 %>% 
              drop_na(length_mm, n.eggs) %>%
              dplyr::select("length_mm", "weight_g", "sai_location", "sai_cou_code", "origin", "fi_year", "n.eggs")) %>%
  bind_rows(fin.salfec2 %>% 
              drop_na(length_mm, n.eggs) %>%
              # correct an obvious error (cm and not mm)
              mutate(length_mm = if_else(length_mm == 85, 850, length_mm),
                     weight_g = NA) %>% 
              dplyr::select("length_mm", "weight_g", "sai_location", "sai_cou_code", "origin", "fi_year", "n.eggs")) %>%
  left_join(SweFin.rivers) %>%
  bind_rows(fra.salfec %>%
              mutate(length_mm = if_else(is.na(length.t), exp(0.05704206)*length.f^0.99670332, length.t)) %>%
              drop_na(length_mm, n.eggs) %>%
              dplyr::select("length_mm", "weight_g", "sai_location", "sai_cou_code", "origin", "fi_year", "n.eggs") %>%
              left_join(fra.rivers2))
drop_na: removed 90 rows (12%), 676 rows remaining
drop_na: no rows removed
drop_na: removed 2 rows (1%), 191 rows remaining
drop_na: removed 16 rows (29%), 39 rows remaining
mutate: changed one value (3%) of 'length_mm' (0 new NAs)
        new variable 'weight_g' (logical) with one unique value and 100% NA
Joining with `by = join_by(sai_location)`
left_join: added 6 columns (fisa_y_4326, fisa_x_4326, asses.unit, stock.origin, stock.unit, …)
           > rows only in x                0
           > rows only in SweFin.rivers ( 51)
           > matched rows                926
           >                            =====
           > rows total                  926
mutate: new variable 'length_mm' (double) with 260 unique values and 0% NA
drop_na: removed 2 rows (<1%), 500 rows remaining
Joining with `by = join_by(sai_location)`
left_join: added 5 columns (river, stock.unit, region.gen, fisa_x_4326, fisa_y_4326)
           > rows only in x              7
           > rows only in fra.rivers2 ( 55)
           > matched rows              535    (includes duplicates)
           >                          =====
           > rows total                542
Show code
str(all.salfec)
tibble [1,468 × 15] (S3: tbl_df/tbl/data.frame)
 $ length_mm   : num [1:1468] 840 830 710 940 870 860 870 840 960 820 ...
 $ weight_g    : num [1:1468] 5200 5200 3700 8800 6300 6200 6900 6200 10300 5300 ...
 $ sai_location: chr [1:1468] "Umeälven" "Umeälven" "Umeälven" "Umeälven" ...
 $ sai_cou_code: chr [1:1468] "SWE" "SWE" "SWE" "SWE" ...
 $ origin      : chr [1:1468] "reared" "reared" "reared" "reared" ...
 $ fi_year     : num [1:1468] 2005 2005 2005 2005 2005 ...
 $ n.eggs      : num [1:1468] 9338 6433 6779 11208 9418 ...
 $ fisa_y_4326 : num [1:1468] 63.7 63.7 63.7 63.7 63.7 ...
 $ fisa_x_4326 : num [1:1468] 20.3 20.3 20.3 20.3 20.3 ...
 $ asses.unit  : num [1:1468] 2 2 2 2 2 2 2 2 2 2 ...
 $ stock.origin: chr [1:1468] "wild" "wild" "wild" "wild" ...
 $ stock.unit  : chr [1:1468] NA NA NA NA ...
 $ region      : chr [1:1468] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ river       : chr [1:1468] NA NA NA NA ...
 $ region.gen  : chr [1:1468] NA NA NA NA ...

4b. Create spatial units

Show code
all.salfec <- all.salfec %>%
  mutate(spat.unit = case_when(region == "Baltic.sea" ~ paste0(region,":AU-",asses.unit),
                             region == "Swedish.westcoast" ~ region, 
                             .default = region.gen))
mutate: new variable 'spat.unit' (character) with 9 unique values and <1% NA

4c. Map and temporal summary of fecundity

Seven French individuals from 1974 and 1975 lacks sai_location information.

Show code
# Fecundity at length all data 
all.salfec %>%
  ggplot(aes(length_mm, n.eggs, color = spat.unit)) +
  geom_point() +
  scale_fill_viridis_d() +
  theme_light()

Show code
# map
plot_map_Euro +
  geom_point(data = all.salfec %>% drop_na(fisa_y_4326,fisa_x_4326) %>% add_utm_columns(ll_names = c("fisa_x_4326", "fisa_y_4326"), utm_crs = 32633), aes(X*1000, Y*1000), size = 0.3, color = "#440154FF") 
drop_na: removed 7 rows (<1%), 1,461 rows remaining
Warning in plot_theme(plot): The `tagger.panel.tag.text` theme element is not defined in the element
hierarchy.

Show code
ggsave("fec_map.png", scale = 0.8)
Saving 5.6 x 4 in image
Warning in plot_theme(plot): The `tagger.panel.tag.text` theme element is not defined in the element
hierarchy.
The `tagger.panel.tag.text` theme element is not defined in the element
hierarchy.
Show code
# by sai_location
all.salfec %>%
  summarise(count = n(), .by = c(fi_year, sai_cou_code, origin, sai_location)) %>%
  mutate(Ind.count = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         Ind.count = fct_reorder(Ind.count, count)) %>% 
  ggplot(aes(fi_year, sai_location, fill = Ind.count)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light()
summarise: now 96 rows and 5 columns, ungrouped
mutate: new variable 'Ind.count' (factor) with 4 unique values and 0% NA

Show code
# by spat aggregation
all.salfec %>%
  summarise(count = n(), .by = c(fi_year, sai_cou_code, origin, spat.unit)) %>%
  mutate(Ind.count = as.factor(ifelse(count > 50, ">50",
                                  ifelse(count > 30 & count <= 50, ">30",
                                         ifelse(count > 10 & count <= 30, ">10",
                                         "1 - 10")))),
         Ind.count = fct_reorder(Ind.count, count)) %>% 
  ggplot(aes(fi_year, spat.unit, fill = Ind.count)) +
  geom_tile(color = "gray30") +
  scale_fill_viridis_d() +
  theme_light()
summarise: now 72 rows and 5 columns, ungrouped
mutate: new variable 'Ind.count' (factor) with 4 unique values and 0% NA

Show code
# Number of individuals fecundity 
all.salfec %>%
  drop_na(sai_location) %>%
  summarise(ind.count = n(), .by = spat.unit) %>%
  ggplot(aes(ind.count, spat.unit)) +
  geom_bar(stat="identity", position = "stack", fill = "#440154FF") +
  geom_text(aes(label = ind.count), nudge_x = 20)
drop_na: removed 7 rows (<1%), 1,461 rows remaining
summarise: now 8 rows and 2 columns, ungrouped

5. Build data sets and agregate by spatial aggregation

Show code
# length at age
salmon.laa <- all.sallaa6

# fecundity
salmon.fec <- all.salfec 

str(salmon.fec)
tibble [1,468 × 16] (S3: tbl_df/tbl/data.frame)
 $ length_mm   : num [1:1468] 840 830 710 940 870 860 870 840 960 820 ...
 $ weight_g    : num [1:1468] 5200 5200 3700 8800 6300 6200 6900 6200 10300 5300 ...
 $ sai_location: chr [1:1468] "Umeälven" "Umeälven" "Umeälven" "Umeälven" ...
 $ sai_cou_code: chr [1:1468] "SWE" "SWE" "SWE" "SWE" ...
 $ origin      : chr [1:1468] "reared" "reared" "reared" "reared" ...
 $ fi_year     : num [1:1468] 2005 2005 2005 2005 2005 ...
 $ n.eggs      : num [1:1468] 9338 6433 6779 11208 9418 ...
 $ fisa_y_4326 : num [1:1468] 63.7 63.7 63.7 63.7 63.7 ...
 $ fisa_x_4326 : num [1:1468] 20.3 20.3 20.3 20.3 20.3 ...
 $ asses.unit  : num [1:1468] 2 2 2 2 2 2 2 2 2 2 ...
 $ stock.origin: chr [1:1468] "wild" "wild" "wild" "wild" ...
 $ stock.unit  : chr [1:1468] NA NA NA NA ...
 $ region      : chr [1:1468] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ river       : chr [1:1468] NA NA NA NA ...
 $ region.gen  : chr [1:1468] NA NA NA NA ...
 $ spat.unit   : chr [1:1468] "Baltic.sea:AU-2" "Baltic.sea:AU-2" "Baltic.sea:AU-2" "Baltic.sea:AU-2" ...
Show code
str(salmon.laa)
tibble [307,558 × 23] (S3: tbl_df/tbl/data.frame)
 $ sai_cou_code     : chr [1:307558] "SWE" "SWE" "SWE" "SWE" ...
 $ fi_year          : num [1:307558] 2015 2015 2015 2015 2015 ...
 $ sai_location     : chr [1:307558] "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" "Östersjön (hela) ICES SD 22-32" ...
 $ origin           : chr [1:307558] "wild" "wild" "wild" "reared" ...
 $ length_mm        : num [1:307558] 990 840 860 880 860 1000 860 820 830 840 ...
 $ weight_g         : num [1:307558] 9300 5400 5900 4400 5000 8300 5500 5100 5300 5500 ...
 $ sea_age_year     : num [1:307558] 4 2 2 3 3 3 2 2 3 2 ...
 $ juvenile_age_year: num [1:307558] 3 3 2 NA 4 3 3 3 2 3 ...
 $ sex              : chr [1:307558] "m" "f" "f" "f" ...
 $ date             : POSIXct[1:307558], format: "2017-09-27" "2015-06-14" ...
 $ fisa_y_4326      : num [1:307558] 58.5 58.5 58.5 58.5 58.5 ...
 $ fisa_x_4326      : num [1:307558] 19.8 19.8 19.8 19.8 19.8 ...
 $ asses.unit       : num [1:307558] NA NA NA NA NA NA NA NA NA NA ...
 $ stock.origin     : chr [1:307558] NA NA NA NA ...
 $ stock.unit       : chr [1:307558] NA NA NA NA ...
 $ region           : chr [1:307558] "Baltic.sea" "Baltic.sea" "Baltic.sea" "Baltic.sea" ...
 $ sai_lfs_code     : chr [1:307558] NA NA NA NA ...
 $ river            : chr [1:307558] NA NA NA NA ...
 $ region.gen       : chr [1:307558] NA NA NA NA ...
 $ spat.unit        : chr [1:307558] "Baltic.sea:AU-NA" "Baltic.sea:AU-NA" "Baltic.sea:AU-NA" "Baltic.sea:AU-NA" ...
 $ age.type         : chr [1:307558] "both" "both" "both" "sea.only" ...
 $ tot_age_year     : num [1:307558] 7 5 4 3 7 6 5 5 5 5 ...
 $ tot_age_decimyear: num [1:307558] 0.74 0.452 0.452 0.452 0.452 ...
Show code
# salmon.laa %>%
#   distinct(fi_year,spat.unit) %>%
#   count(spat.unit) %>%
#   mutate(m = median(n))
#     
# salmon.laa %>%
#   count(sex,sai_cou_code) %>%
#   mutate(x = n, .by = c(sex,sai_cou_code))
# 
# salmon.fec %>% 
#   distinct(spat.unit, fi_year) %>%
#   count(spat.unit) %>%
# 
# salmon.laa %>%
#   summarise(n = n(), .by = c(sai_cou_code, spat.unit, ))

6. Save data

Show code
saveRDS(salmon.laa, paste0(file = paste0(home,"/data/data-for-2-2/salmon-laa_",Sys.Date(),".RData")))
saveRDS(salmon.fec, paste0(file = paste0(home,"/data/data-for-2-2/salmon-fec_",Sys.Date(),".RData")))